bitkeeper revision 1.15.3.2 (3e37b8332YRktwAjVLsh2PyFFW2XNw)
authorakw27@boulderdash.cl.cam.ac.uk <akw27@boulderdash.cl.cam.ac.uk>
Wed, 29 Jan 2003 11:17:07 +0000 (11:17 +0000)
committerakw27@boulderdash.cl.cam.ac.uk <akw27@boulderdash.cl.cam.ac.uk>
Wed, 29 Jan 2003 11:17:07 +0000 (11:17 +0000)
RX data is now moved in a domain-memory page, but still copied at the end.

xen-2.4.16/drivers/net/tulip/interrupt.c
xen-2.4.16/include/asm-i386/pci.h
xen-2.4.16/include/xeno/skbuff.h
xen-2.4.16/net/dev.c
xen-2.4.16/net/eth.c
xen-2.4.16/net/skbuff.c

index c92b12ea9250ef6cc984ec7737fd613be69186f7..8e88f2f4579139b8f7731ee141bd567bce0a938e 100644 (file)
@@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *dev)
 #endif
                        /* Check if the packet is long enough to accept without copying
                           to a minimally-sized skbuff. */
-                       if (pkt_len < tulip_rx_copybreak
-                               && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
+                       //if (pkt_len < tulip_rx_copybreak
+                       //      && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
+                        if (0) {
                                skb->dev = dev;
                                skb_reserve(skb, 2);    /* 16 byte align the IP header */
                                pci_dma_sync_single(tp->pdev,
index 9ab9c282fed6f1577e283ee6905cb617e0ddd67c..43fab42762db2695e510fde0ac6ddf36526a2352 100644 (file)
@@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
        if (direction == PCI_DMA_NONE)
                BUG();
        flush_write_buffers();
-       return virt_to_bus(ptr);
+
+        if ((unsigned long) ptr > PAGE_OFFSET)
+           return virt_to_bus(ptr);
+
+        /* If an address that is not in hypervisor VM is passed to this 
+         * function (ie > PAGE_OFFSET) we assume that the passer knows 
+         * what they are doing, and have passed a physical address that 
+         * should not be converted here.  This is a little hackish, but 
+         * is being added to allow references to domain memory in order 
+         * to support zero-copy network code.
+         */
+        
+        return (dma_addr_t) ptr;
 }
 
 /* Unmap a single streaming mode DMA translation.  The dma_addr and size
index f9c38c12bf8eda25de507863851cd0484ee22b60..dd6257b370ff62159b175848c78aa6850a6ab8af 100644 (file)
 #define VIF_DROP                -3
 #define VIF_ANY_INTERFACE       -4
 
+//skb_type values:
+#define SKB_NORMAL               0
+#define SKB_ZERO_COPY            1
+
 #define HAVE_ALLOC_SKB         /* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB     /* Ditto 8)                */
 #define SLAB_SKB               /* Slabified skbuffs       */
@@ -187,7 +191,7 @@ struct sk_buff {
        unsigned int    data_len;
        unsigned int    csum;                   /* Checksum                                     */
        unsigned char   __unused,               /* Dead field, may be reused                    */
-                       cloned,                 /* head may be cloned (check refcnt to be sure). */
+                       cloned,                 /* head may be cloned (check refcnt to be sure) */
                        pkt_type,               /* Packet class                                 */
                        ip_summed;              /* Driver fed us an IP checksum                 */
        __u32           priority;               /* Packet queueing priority                     */
@@ -203,8 +207,12 @@ struct sk_buff {
 
        void            (*destructor)(struct sk_buff *);        /* Destruct function            */
 
-        int src_vif;                            /* vif we came from */
-        int dst_vif;                            /* vif we are bound for */
+        unsigned int    skb_type;               /* SKB_NORMAL or SKB_ZERO_COPY                  */
+        struct pfn_info *pf;                    /* record of physical pf address for freeing    */
+        int src_vif;                            /* vif we came from                             */
+        int dst_vif;                            /* vif we are bound for                         */
+        struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
+        
 
                 
         
@@ -244,6 +252,7 @@ struct sk_buff {
 
 extern void                    __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *                alloc_skb(unsigned int size, int priority);
+extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
 extern void                    kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *                skb_clone(struct sk_buff *skb, int priority);
 extern struct sk_buff *                skb_copy(const struct sk_buff *skb, int priority);
@@ -259,7 +268,8 @@ extern void skb_over_panic(struct sk_buff *skb, int len, void *here);
 extern void    skb_under_panic(struct sk_buff *skb, int len, void *here);
 
 /* Internal */
-#define skb_shinfo(SKB)                ((struct skb_shared_info *)((SKB)->end))
+//#define skb_shinfo(SKB)              ((struct skb_shared_info *)((SKB)->end))
+#define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
 
 /**
  *     skb_queue_empty - check if a queue is empty
@@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 {
        struct sk_buff *skb;
 
-       skb = alloc_skb(length+16, gfp_mask);
+       //skb = alloc_skb(length+16, gfp_mask);
+        skb = alloc_zc_skb(length+16, gfp_mask);
        if (skb)
                skb_reserve(skb,16);
        return skb;
index 9d02fb3f2893af8042309ea30e8fa18035511733..749d50aa3f56d7b085c7fb4b8b610f3ed77ec6ec 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/pkt_sched.h>
 
 #include <linux/event.h>
+#include <asm/domain_page.h>
 
 #define BUG_TRAP ASSERT
 #define notifier_call_chain(_a,_b,_c) ((void)0)
@@ -695,6 +696,21 @@ int netif_rx(struct sk_buff *skb)
        if (skb->stamp.tv_sec == 0)
                get_fast_time(&skb->stamp);
 
+        /* Attempt to handle zero-copy packets here: */
+        if (skb->skb_type == SKB_ZERO_COPY)
+        {
+                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
+
+                /* remapping this address really screws up all the skb pointers.  We need 
+                 * to map them all here sufficiently to get the packet demultiplexed.
+                 */
+                
+                skb->data = skb->head;
+                skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this.
+                skb->mac.raw = skb->data;
+                skb->data += ETH_HLEN;
+        }
+        
        /* The code is rearranged so that the path is the most
           short when CPU is congested, but is still operating.
         */
@@ -747,10 +763,18 @@ drop:
        netdev_rx_stat[this_cpu].dropped++;
        local_irq_restore(flags);
 
+        if (skb->skb_type == SKB_ZERO_COPY)
+                unmap_domain_mem(skb->head);
+        
        kfree_skb(skb);
        return NET_RX_DROP;
 
 found:
+        if (skb->skb_type == SKB_ZERO_COPY) {
+                unmap_domain_mem(skb->head);
+                //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT);
+                skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
+        }
         hyp_event_notify(cpu_mask);
         local_irq_restore(flags);
         return 0;
@@ -930,8 +954,28 @@ void flush_rx_queue(void)
                     rx = shadow_ring->rx_ring+i;
                     if ( (skb->len + ETH_HLEN) < rx->size )
                         rx->size = skb->len + ETH_HLEN;
+
+                    /* remap the packet again.  This is very temporary and will shortly be
+                     * replaced with a page swizzle.
+                     */
+
+                    if (skb->skb_type == SKB_ZERO_COPY)
+                    {
+                        skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
+                        skb->data = skb->head;
+                        skb_reserve(skb,16); 
+                        skb->mac.raw = skb->data;
+                        skb->data += ETH_HLEN;
+                    }
+                                                                        
                     copy_to_user((void *)rx->addr, skb->mac.raw, rx->size);
                     copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx));
+                    
+                    if (skb->skb_type == SKB_ZERO_COPY)
+                    {
+                        unmap_domain_mem(skb->head);
+                        skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
+                    }
                 }
                 net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
                 if ( net_ring->rx_cons == net_ring->rx_event )
index d982eef39b5ada0734706d3256024ad6bce51e19..5238de022ef021d55af57bfdf2567abf7ac18fcc 100644 (file)
@@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev)
        struct ethhdr *eth;
        unsigned char *rawp;
        
-       skb->mac.raw=skb->data;
-       skb_pull(skb,dev->hard_header_len);
-       eth= skb->mac.ethernet;
+        if (skb->skb_type == SKB_ZERO_COPY)
+        {
+            skb_pull(skb,dev->hard_header_len);
+            skb->mac.raw= (void *)0xdeadbeef;
+            return htons(ETH_P_802_2);
+            
+        } else { // SKB_NORMAL
+        
+           skb->mac.raw=skb->data;
+           skb_pull(skb,dev->hard_header_len);
+           eth= skb->mac.ethernet;
        
-       if(*eth->h_dest&1)
-       {
-               if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
+           if(*eth->h_dest&1)
+           {
+               if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
                        skb->pkt_type=PACKET_BROADCAST;
                else
                        skb->pkt_type=PACKET_MULTICAST;
-       }
+           }
        
-       /*
-        *      This ALLMULTI check should be redundant by 1.4
-        *      so don't forget to remove it.
-        *
-        *      Seems, you forgot to remove it. All silly devices
-        *      seems to set IFF_PROMISC.
-        */
+           /*
+           *   This ALLMULTI check should be redundant by 1.4
+           *   so don't forget to remove it.
+           *
+           *   Seems, you forgot to remove it. All silly devices
+           *   seems to set IFF_PROMISC.
+           */
         
-       else if(1 /*dev->flags&IFF_PROMISC*/)
-       {
+           else if(1 /*dev->flags&IFF_PROMISC*/)
+           {
                if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
                        skb->pkt_type=PACKET_OTHERHOST;
-       }
+           }
        
-       if (ntohs(eth->h_proto) >= 1536)
+           if (ntohs(eth->h_proto) >= 1536)
                return eth->h_proto;
                
-       rawp = skb->data;
+           rawp = skb->data;
        
-       /*
-        *      This is a magic hack to spot IPX packets. Older Novell breaks
-        *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
-        *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
-        *      won't work for fault tolerant netware but does for the rest.
-        */
-       if (*(unsigned short *)rawp == 0xFFFF)
+           /*
+           *   This is a magic hack to spot IPX packets. Older Novell breaks
+           *   the protocol design and runs IPX over 802.3 without an 802.2 LLC
+           *   layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
+           *   won't work for fault tolerant netware but does for the rest.
+           */
+           if (*(unsigned short *)rawp == 0xFFFF)
                return htons(ETH_P_802_3);
                
-       /*
-        *      Real 802.2 LLC
-        */
-       return htons(ETH_P_802_2);
+           /*
+           *   Real 802.2 LLC
+           */
+           return htons(ETH_P_802_2);
+        }
 }
 
+
 int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
        struct ethhdr *eth = skb->mac.ethernet;
index 07896fda7894e845e568f289e02d7997281045b3..abd2c73ed1f79c572b8298d7c467757edc2daa35 100644 (file)
@@ -149,6 +149,102 @@ static __inline__ void skb_head_to_pool(struct sk_buff *skb)
        kmem_cache_free(skbuff_head_cache, skb);
 }
 
+static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
+{
+        struct list_head *list_ptr;
+        struct pfn_info  *pf;
+        unsigned long flags;
+        
+        spin_lock_irqsave(&free_list_lock, flags);
+
+        if (!free_pfns) return NULL;
+
+        list_ptr = free_list.next;
+        pf = list_entry(list_ptr, struct pfn_info, list);
+        pf->flags = 0; // owned by dom0
+        list_del(&pf->list);
+        pf->next = pf->prev = (pf - frame_table);
+        free_pfns--;
+
+        spin_unlock_irqrestore(&free_list_lock, flags);
+
+        skb->pf = pf;
+        return (u8 *)((pf - frame_table) << PAGE_SHIFT);
+}
+
+static inline void dealloc_skb_data_page(struct sk_buff *skb)
+{
+        struct pfn_info  *pf;
+        unsigned long flags;
+
+        pf = skb->pf;
+
+        spin_lock_irqsave(&free_list_lock, flags);
+
+        list_add_tail(&pf->list, &free_list);
+        free_pfns++;
+
+        spin_unlock_irqrestore(&free_list_lock, flags);
+}
+
+struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
+{
+        struct sk_buff *skb;
+        u8 *data;
+
+        if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
+                static int count = 0;
+                if (++count < 5) {
+                        printk(KERN_ERR "alloc_skb called nonatomically "
+                               "from interrupt %p\n", NET_CALLER(size));
+                        BUG();
+                }
+                gfp_mask &= ~__GFP_WAIT;
+        }
+
+        /* Get the HEAD */
+        skb = skb_head_from_pool();
+        if (skb == NULL) {
+                skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
+                if (skb == NULL)
+                        goto nohead;
+        }
+
+        /* Get the DATA. Size must match skb_add_mtu(). */
+        size = SKB_DATA_ALIGN(size);
+        data = alloc_skb_data_page(skb);
+        if (data == NULL)
+                goto nodata;
+
+        /* XXX: does not include slab overhead */
+        skb->truesize = size + sizeof(struct sk_buff);
+
+        /* Load the data pointers. */
+        skb->head = data;
+        skb->data = data;
+        skb->tail = data;
+        skb->end = data + size;
+
+        /* Set up other state */
+        skb->len = 0;
+        skb->cloned = 0;
+        skb->data_len = 0;
+        skb->src_vif = VIF_UNKNOWN_INTERFACE;
+        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
+        skb->skb_type = SKB_ZERO_COPY;
+
+        atomic_set(&skb->users, 1);
+        atomic_set(&(skb_shinfo(skb)->dataref), 1);
+        skb_shinfo(skb)->nr_frags = 0;
+        skb_shinfo(skb)->frag_list = NULL;
+        return skb;
+
+nodata:
+        skb_head_to_pool(skb);
+nohead:
+        return NULL;
+}
+
 
 /*     Allocate a new skbuff. We do this ourselves so we can fill in a few
  *     'private' fields and also do memory statistics to find all the
@@ -213,6 +309,7 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
        skb->data_len = 0;
         skb->src_vif = VIF_UNKNOWN_INTERFACE;
         skb->dst_vif = VIF_UNKNOWN_INTERFACE;
+        skb->skb_type = SKB_NORMAL;
 
        atomic_set(&skb->users, 1); 
        atomic_set(&(skb_shinfo(skb)->dataref), 1);
@@ -295,7 +392,13 @@ static void skb_release_data(struct sk_buff *skb)
                if (skb_shinfo(skb)->frag_list)
                        skb_drop_fraglist(skb);
 
-               kfree(skb->head);
+                if (skb->skb_type == SKB_NORMAL) {
+                   kfree(skb->head);
+                } else if (skb->skb_type == SKB_ZERO_COPY) {
+                    dealloc_skb_data_page(skb);
+                } else {
+                    printk("skb_release_data called with unknown skb type!\n");
+                }
        }
 }